getData <- function(x) {

  # get single filled circle shape and convert green to orange for metals in biota
  
  x$timeSeries <- within(x$timeSeries, {
    shape[shape %in% c("small_filled_circle", "large_filled_circle")] <- "filled_circle"
    shape[shape %in% "small_open_circle"] <- "open_circle"
    
    if (x$ID == "biota Metals")
      colour[colour %in% "green"] <- "orange"
  })
  

  # get local copies for use in constructing trend and status data frames
    
  AC <- x$AC
  assessment <- x$assessment
  timeSeries <- x$timeSeries

  
  # get status stations - just keep those with a parametric fit
  # NB some filled circles will disappear - these are the ones for which status was assessed using a 
  # non-parametric test
  
  ok <- with(timeSeries, paramFit)
  if (!any(ok)) return(x)
  
  timeSeries <- droplevels(timeSeries[ok, ])
  assessment <- assessment[row.names(timeSeries)]

  
  # useful variables for later
  
  varID <- intersect(
    c("determinand", "country", "station", "species", "OSPARregion", "MSFDregion", "region", 
      "offshore", "MSTAT", "latitude", "longitude", "colour", "shape"), 
    names(timeSeries))
  
    
  # get summary status information
  
  if (x$ID == "biota Imposex") { 

    Status <- sapply(assessment, USE.NAMES = TRUE, simplify = FALSE, FUN = function(assessment) {
      out <- tail(assessment$pred, 1)
      out$index <- NULL # only in some time series!
      cbind(out, assessment$summary[c("meanLY", "clLY", AC)])
    })

    Status <- do.call(rbind, Status)
    
    Status[varID] <- timeSeries[row.names(Status), varID]
    
    warning(
      "ad-hoc approach to calculating standard errors; need to get these in proportional odds model", 
      call. = FALSE)
    
    Status <- within(Status, {
      fit <- sqrt(meanLY)
      se <- (sqrt(clLY) - sqrt(meanLY)) / 1.645
    })
    
    if (!is.null(AC)) Status[paste("status", AC, sep = ".")] <- Status$fit / sqrt(Status[AC])
    
    # se.status is based on an AC of 1 - will need to adjust in Genstat (too complicated to do that here!)
    # also ad-hoc adjustment to very small standard errors (usually when trend diving to minus infinity)
    # made as larges as the smallest standard error, when estimated values is above the BAC
    
    warning("ad-hoc adjustment to small standard errors", call. = FALSE)
    
    Status <- within(Status, {
      status <- fit
      se.status <- pmax(se, min(se[status > BAC], na.rm = TRUE))
      station <- factor(station)
      determinand <- factor(as.character(determinand), levels = x$detID)
    })
    
  } else {
    
    Status <- sapply(assessment, USE.NAMES = TRUE, simplify = FALSE, FUN = function(assessment) 
      cbind(tail(assessment$pred, 1), assessment$summary[AC]))

    Status <- do.call(rbind, Status)
    
    Status[varID] <- timeSeries[row.names(Status), varID]
    
    if (!is.null(AC)) Status[paste("status", AC, sep = ".")] <- Status$fit - log(Status[AC])
    
    Status <- within(Status, {
      status <- fit
      se.status <- se
      station <- factor(station)
      determinand <- factor(as.character(determinand), levels = x$detID)
    })
    
    # if biota, ensure all status estimates are on a wet weight basis 
    
    if (substring(x$ID, 1, 5) == "biota") {
      Status$status <- with(Status, {
        group <- get.info("determinand", determinand, "group", "biota")
        basis <- get.basis("biota", x$purpose, group, species)
        family <- as.character(get.info("species", species, "family"))
        
        drywt <- get.info("species", species, "SB.DRYWT%", na.action = "output.ok")
        li.lipidwt <- get.info("species", species, "LI.LIPIDWT%", na.action = "output.ok")
        mu.lipidwt <- get.info("species", species, "MU.LIPIDWT%", na.action = "output.ok")
        sb.lipidwt <- get.info("species", species, "SB.LIPIDWT%", na.action = "output.ok")
        
        in.muscle <- determinand %in% "HG" | 
          (species %in% c("Clupea harengus", "Zoarces viviparus") & !(group %in% c("metal", "organoMetal")))
        lipidwt <- ifelse(family == "Fish", ifelse(in.muscle, mu.lipidwt, li.lipidwt), sb.lipidwt)
        
        convert.basis(status, basis, rep("W", length(status)), "drywt", "", lipidwt, "")
      })
    }
    
    # now change newColour in timeSeries so that it is orange if the point estimate is below the 
    # EAC (or equivalent), but the upper confidence limit is above 
    
    # EACid <- paste0("status.", c("ERL", "EAC", "EC"))
    # EACid <- EACid[EACid %in% names(Status)]
    
    # if (length(EACid)) {
    #  Status <- within(Status, {
    #    newColour <- ifelse(newColour %in% "red" & get(EACid) < 0, "orange", newColour)
    #  })
    #  x$timeSeries[row.names(Status), "newColour"] <- Status$newColour
    #}
    
  }
        
  x$status <- droplevels(Status)
  


  # now look at only time series with trend information (given by nypos) 
  
  ok <- with(timeSeries, nypos >= 5)
  if (!any(ok)) return(x)
  
  timeSeries <- droplevels(timeSeries[ok, ])
  assessment <- assessment[row.names(timeSeries)]
  
  
  if (x$ID == "biota Imposex") { 

    Trend <- sapply(assessment, USE.NAMES = TRUE, simplify = FALSE, function(assessment) {
      if ("contrasts" %in% names(assessment)) {
        out <- assessment$contrasts["recent", ]
        out <- within(out, {
          trend <- estimate / (end - start)
          se.trend <- se / (end - start)
        })      
        out <- out[c("trend", "se.trend")]
      } 
      else {
        out <- assessment$coefficients["year", c("Estimate", "Std. Error")]
        names(out) <- c("trend", "se.trend")
        out
      }
    })
    
    Trend <- do.call(rbind, Trend)
    
    Trend[varID] <- timeSeries[row.names(Trend), varID]
    
    Trend <- within(Trend, {
      
      # look at on exponential scale to avoid extreme low value having an undue effect (perhaps)
      # but some extreme trends then have very small standard errors, so have to make an
      # ad-hoc adjustment to get predictions
      
      trend <- exp(trend)
      se.trend <- se.trend * trend
      se.trend <- pmax(se.trend, min(se.trend[trend > 0.1], na.rm = TRUE))
      
      station <- factor(station)
      determinand <- factor(as.character(determinand), levels = x$detID)
    })

    message("Dropping baseline stations for regional trend assessment")
    
    Trend <- Trend[Trend$MSTAT %in% c("RH", "IH"), ]

  } else {

    Trend <- sapply(assessment, USE.NAMES = TRUE, simplify = FALSE, function(assessment)
      assessment$contrasts["recent", ])
    
    Trend <- do.call(rbind, Trend)
    
    Trend[varID] <- timeSeries[row.names(Trend), varID]

    Trend <- within(Trend, {
      trend <- 100 * estimate / (end - start)
      se.trend <- 100 * se / (end - start)
      station <- factor(station)
      determinand <- factor(as.character(determinand), levels = x$detID)
    })

    message("Dropping baseline and impacted stations for regional trend assessment")
    
    Trend <- Trend[Trend$MSTAT %in% "RH", ]
    
  }
   
  x$trend <- droplevels(Trend)
  
  x
}


regionalData <- function(assessment, type = c("initialise", "finalise")) {
  
  isMetals <- strsplit(assessment$ID, " ")[[1]][2] == "Metals"
  AC <- assessment$AC
  
  dataID <- switch(
    type, 
    initialise = c("trend", "status"), 
    finalise = c("regionalTrend", "regionalStatus")
  )
  
  assessment[c("regionalTrend", "regionalStatus")] <- lapply(dataID, function(id) {
    
    if (is.null(assessment[[id]])) return(NULL)
    
    data <- assessment[[id]]
    
    # if status file passed in, remove any rows with no status values - can happen e.g. with Crangon where
    # there is a black colour (so a status value but no ACs), but the status value is on an alternative basis
    # and can't be transformed to the common basis
    
    if (id == "status") {
      varID <- "status"
      if (!is.null(AC)) 
        varID <- c(varID, paste("status", AC, sep = "."))
    
      ok <- apply(!is.na(data[varID]), 1, any)
      data <- data[ok, ]
    }

    # must have at least three unique sampling locations in each region
    # NB can't use station because (conceivably) different countries could monitor at the same spot
    
    coverage <- with(unique(data[c("latitude", "longitude", "region")]), table(region))
    ok <- names(coverage)[coverage >= 3]
    data <- droplevels(data[data$region %in% ok, ])

    # must have at least three unique sampling locations for each determinand over all regions
        
    coverage <- with(unique(data[c("latitude", "longitude", "determinand")]), table(determinand))
    ok <- names(coverage)[coverage >= 3]
    data <- droplevels(data[data$determinand %in% ok, ])
    
    stopifnot(with(data, table(region) >= 3), with(data, table(determinand) >= 3))  

    # extra reduction for metals, because we don't analyse them as a random effect (assume 
    # they arise from different processes)
        
    if (isMetals) {
      det.reg <- with(unique(data[c("latitude", "longitude", "determinand", "region")]), 
                      paste(determinand, region))
      coverage <- table(det.reg)
      ok <- names(coverage)[coverage >= 3]
      data <- droplevels(data[with(data, paste(determinand, region)) %in% ok, ])
    }

    data
  })
  
  assessment
}




getMetaData <- function(compartmentID, detGroups, byGroup = FALSE) {

  # get assessment object 

  assessmentOb <- local({
    load(file.path("..", "RData backup", paste(compartmentID, "web.RData")))
    get(paste0(compartmentID, ".web"))
  })
  
  
  # get preliminary symbol and shape for each time series
  
  overview <- ctsm.xml.overview(assessmentOb$assessment, assessmentOb$classColour)

  assessmentOb <- assessmentOb$assessment    
  
  purpose <- assessmentOb$info$purpose
  timeSeries <- assessmentOb$timeSeries
  assessment <- assessmentOb$assessment
  data <- assessmentOb$data

  rm(assessmentOb)

  
  # only retain relevant determinand groups and merge overview with timeSeries data
  
  timeSeries <- timeSeries[timeSeries$detGroup %in% detGroups, ]
  
  timeSeries[c("shape", "colour")] <- overview[row.names(timeSeries), ]
  

  # CSEMP drop estuarine stations 
  
  if (purpose == "OSPAR") 
    message("Using all stations regardless of WLTYP")
  else if (purpose == "CSEMP") {
    message("Dropping estuarine stations")
    warning("Need to pass estuarine information directly to assessment object", call. = FALSE)
    infile = file.path("..", "data", "stndict_merman_041016.xlsx")
    stations <- xlsx::read.xlsx(infile, sheetName = "Fixed Stations", check.names = FALSE)
    stations <- data.frame(stations[c("New Station Code", "ICES Station Type")], 
                           row.names = "New Station Code", check.names = FALSE)
    ok <- !(stations[as.character(timeSeries$station), "ICES Station Type"] %in% "Estuary")
    timeSeries <- droplevels(timeSeries[ok, ])
  }

  
  # sort out region information and ensure ordered sensibly geographically
  
  if (purpose == "OSPAR") {
  
    # combine Dogger Bank with Southern North Sea
    # order regions from North to South

    regionID <- c("Gulf of Cadiz", "Iberian Sea", "Northern Bay of Biscay", "Celtic Sea", "Irish Sea", 
                  "Irish and Scottish West Coast", "Channel", "Southern North Sea", "Skagerrak and Kattegat", 
                  "Northern North Sea", "Norwegian Trench", "Norwegian Sea", "Greenland-Scotland ridge", 
                  "Barents Sea")    
    
    timeSeries <- within(timeSeries, {
      region[region %in% "Dogger Bank"] <- "Southern North Sea"
      stopifnot(region %in% regionID)
      region <- droplevels(factor(region, levels = regionID))
    })
    
  } else if (purpose == "CSEMP") {
    
    # drop stations outside reporting areas
    
    timeSeries <- timeSeries[!timeSeries$region %in% c("AtlanticNW", "ENorthSea", "SIrishSea"), ]

    regionID <- c("W Channel & Celtic Sea", "Irish Sea", "Minches & W Scotland", 
                  "Scottish Continental Shelf", "Atlantic & NW Approaches", 
                  "E Channel", "Southern North Sea", "Northern North Sea")

    timeSeries <- within(timeSeries, {
      region[region %in% c("Anglia", "HumWash")] <- "Southern North Sea"
      region[region %in% c("Bailey", "FaroeShetC", "Rockall")] <- "NW Approaches"
      region[region %in% c("CardBay", "Clyde", "IrishSea")] <- "Irish Sea"
      region[region %in% c("EastChan")] <- "E Channel"
      region[region %in% c("EScotland", "EShetland", "Fladen", "Forth", "Forties", "MorayF", 
                           "TyneTees")] <- "Northern North Sea"
      region[region %in% c("Hebrides", "NScotland", "WShetland")] <- "Scottish Continental Shelf"
      region[region %in% c("MinchMalin")] <- "Minches & W Scotland"
      region[region %in% c("Severn", "WestChan")] <- "W Channel & Celtic Sea"
      region <- droplevels(factor(region, levels = regionID))
    })  
        
    if (any(is.na(timeSeries$region)))
      stop("not all CSEMP regions mapped to CP2 regions")

    # create OSPARregion as temporary way of getting maps
        
    timeSeries <- within(timeSeries, {
      MSFDregion <- region
      levels(MSFDregion) <- list(
        "Celtic Seas" = c("W Channel & Celtic Sea", "Irish Sea", "Minches & W Scotland", 
                "Scottish Continental Shelf", "Atlantic & NW Approaches"), 
        "Greater North Sea" = c("E Channel", "Southern North Sea", "Northern North Sea")
      )
    })
  }    

  timeSeries <- droplevels(timeSeries) 
  
   
  # tidy up assessment and data structures
  
  assessment <- assessment[row.names(timeSeries)]
  data <- data[data$seriesID %in% row.names(timeSeries), ]
  data <- droplevels(data)

  
  # identify timeSeries for which a parametric model has been fitted
  
  if ("Imposex" %in% detGroups)
    warning("Must rationalise ad hoc approach to testing for trend and status information for imposex", 
            call. = FALSE)
  
  paramFit <- sapply(assessment, function(i) !is.null(i$pred))
  timeSeries$paramFit <- paramFit[row.names(timeSeries)]
  
  
  # get nypos to identify whether there is trend information, or just status
  
  nypos <- sapply(row.names(timeSeries), function(i) {
    detID <- timeSeries[i, "determinand"]
    summary <- assessment[[i]]$summary
    if (detID %in% "VDS" & !is.na(summary$rtrend))
        5 
    else 
      summary$nypos
  })
  
  timeSeries$nypos <- nypos[row.names(timeSeries)]

  
  # simpify MSTAT values so that only B, RH and IH
  
  MSTAT.id <- intersect(c("country", "MSTAT"), names(timeSeries))

  cat("\nTable of original MSTAT values\n\n")
  wk <- unique(timeSeries[c(MSTAT.id, "station")])
  print(do.call(table, c(wk[MSTAT.id], list(useNA = "ifany"))))
  cat("\n")

  message("Simplifying MSTAT codes to B, RH and IH")
  
  timeSeries <- within(timeSeries, {
    MSTATsubmitted <- MSTAT
    MSTAT[grepl("RH", MSTAT)] <- "RH"
    MSTAT[grepl("IH", MSTAT)] <- "IH"
    MSTAT[is.na(MSTAT) | MSTAT %in% c("", "RP", "ID")] <- "RH"
    MSTAT <- MSTAT[, drop = TRUE]
  })  
  
  cat("\nTable of simplified MSTAT values\n\n")
  wk <- unique(timeSeries[c(MSTAT.id, "station")])
  print(do.call(table, c(wk[MSTAT.id], list(useNA = "ifany"))))
  cat("\n")


  # correct MSTAT values
  
  stopifnot(timeSeries$MSTAT %in% c("B", "RH", "IH"))
  
  timeSeries <- within(timeSeries, MSTATedited <- MSTAT)

  timeSeries <- switch(
    purpose, 
    OSPAR = within(timeSeries, {
      MSTAT[country %in% c("Belgium", "Denmark", "Sweden") & MSTAT %in% "B"] <- "RH"
      if (compartmentID %in% "biota") {
        MSTAT[country %in% "United Kingdom" & station %in% "MinchMalin_BroadfordBay_sh01"] <- "RH"
      }
    }), 
    CSEMP = within(timeSeries, {
      MSTAT[station %in% "MinchMalin_BroadfordBay_sh01"] <- "RH"
    })
  )
  
  cat("\nTable of corrected MSTAT values\n\n")
  wk <- unique(timeSeries[c(MSTAT.id, "station")])
  print(do.call(table, c(wk[MSTAT.id], list(useNA = "ifany"))))
  cat("\n")


  if (!byGroup) 
    return(
      list(assessment = assessment, timeSeries = timeSeries, data = data, 
           regionID = levels(timeSeries$region), detID = levels(timeSeries$determinand))
    )
  

  # now extract relevant information for each determinand group
  
  sapply(detGroups, simplify = FALSE, FUN = function(ID) {
  
    webpath <- paste(compartmentID, ID)
  
    AC <- switch(webpath, 
                 "sediment Chlorobiphenyls" = c("EAC", "BAC"),
                 "sediment PAH (parent)" = c("ERL", "BAC"),
                 "sediment Metals" = c("ERL", "BAC"),
                 "biota Chlorobiphenyls" = c("EAC", "BAC"),
                 "biota PAH (parent)" = c("BAC", "EAC"),
                 "biota Metals" = c("BAC", "EC"),
                 "biota Imposex" = c("BAC", "EAC"),
                 NULL)
    
    detID <- switch(
      ID, 
      Chlorobiphenyls = c("CB28", "CB52", "CB101", "CB118", "CB138", "CB153", "CB180"), 
      "PAH (parent)" = c("PA", "ANT", "FLU", "PYR", "BAA", "CHR", "BAP", "BGHIP", "ICDP"), 
      "Organo-bromines" = switch(
        compartmentID, 
        sediment = c("BDE28", "BDE47", "BDE99", "BD100", "BD153", "BD154", "BD209"), 
        biota = c("BDE28", "BDE47", "BDE99", "BD100", "BD153", "BD154")), 
      "Organo-metals" = c("TBTIN", "DBTIN", "MBTIN", "TPTIN", "DPTIN", "MPTIN"),
      Metals = c("HG", "CD", "PB"), 
      Imposex = "VDS")
    

    # reduce to relevant determinand group 
    
    timeSeries <- droplevels(subset(timeSeries, determinand %in% detID))
    assessment <- assessment[row.names(timeSeries)]
    data <- droplevels(subset(data, seriesID %in% row.names(timeSeries)))

    timeSeries <- within(timeSeries, determinand <- droplevels(factor(determinand, levels = detID)))

    list(ID = webpath, purpose = purpose, assessment = assessment, timeSeries = timeSeries, data = data, 
         AC = AC, regionID = levels(timeSeries$region), detID = levels(timeSeries$determinand))
  })
}


writeData <- function(assessment) {
  path <- "data for assessors"
  ID <- assessment$ID
  if (!is.null(assessment$trend)) {
    fileName <- paste(ID, "trend all.csv")
    write.csv(assessment$trend, file.path(path, fileName), row.names = FALSE, na = "")
  }
  if (!is.null(assessment$status)) {
    fileName <- paste(ID, "status all.csv")
    write.csv(assessment$status, file.path(path, fileName), row.names = FALSE, na = "")
  }
  if (!is.null(assessment$regionalTrend)) {
    fileName <- paste(ID, "trend meta analysis.csv")
    write.csv(assessment$regionalTrend, file.path(path, fileName), row.names = FALSE, na = "")
  }
  if (!is.null(assessment$regionalStatus)) {
    fileName <- paste(ID, "status meta analysis.csv")
    write.csv(assessment$regionalStatus, file.path(path, fileName), row.names = FALSE, na = "")
  }
}





